package com.yzq.os.spider.v.service.domain;

import java.io.File;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.ClassUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.commons.lang.time.DateUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Service;

import com.yzq.os.spider.v.Constants;
import com.yzq.os.spider.v.dao.SpiderRecordDao;
import com.yzq.os.spider.v.domain.Record;
import com.yzq.os.spider.v.domain.ListPageConfig;
import com.yzq.os.spider.v.domain.QueryURL;
import com.yzq.os.spider.v.domain.SearchEngine;
import com.yzq.os.spider.v.service.CrawlService;
import com.yzq.os.spider.v.service.http.HttpClientService;
import com.yzq.os.spider.v.service.queryurl.CreateQueryURL;
import com.yzq.os.spider.v.service.spider.SpiderTask;
import com.yzq.os.spider.v.util.EncodeUtil;
import com.yzq.os.spider.v.util.Page;

@Service
public class SpiderRecordService {

	private static Logger logger = Logger.getLogger(SpiderRecordService.class);

	private static Map<Integer, CrawlService> runningThread = new ConcurrentHashMap<Integer, CrawlService>();

	public static final String MANUAL_CALL = "MANUAL_CALL";

	public static final String AUTOMATIC_CALL = "AUTOMATIC_CALL";

	public static final String BASE_TABLE_NAME_PREFIX = "crawl_jobs";

	private static final String DATE_SUFFIX_PATTERN = "yyyy_MM_dd";

	@Autowired
	private SpiderRecordDao crawJobDao;

	@Autowired
	private SearchEngineService searchEngineService;

	@Autowired
	private SearchEngineParamService searchEngineParamService;

	@Autowired
	private ListPageConfigService listPageConfigService;

	@Value("${app.onlineDeleteCheckBeforeDay}")
	private int onlineDeleteCheckBeforeDay;

	@Value("${app.offlineDeleteCheckBeforeDay}")
	private int offlineDeleteCheckBeforeDay;

	public static String parseTableName(int searchEngineId, String date)
			throws ParseException {
		Date d = DateUtils.parseDate(date,
				new String[] { Constants.DATE_PATTERN });
		return parseTableName(searchEngineId, d);
	}

	public static String parseTableName(int searchEngineId, Date date) {
		StringBuffer tableName = new StringBuffer();
		tableName.append(BASE_TABLE_NAME_PREFIX);
		tableName.append("_");
		tableName.append(searchEngineId);
		tableName.append("_");
		tableName.append(DateFormatUtils.format(date, DATE_SUFFIX_PATTERN));
		return tableName.toString();
	}

	public boolean isExistTableOnlineDatabase(String tableName) {
		return crawJobDao.isExistTableOnlineDatabase(tableName);
	}

	public boolean isExistTableOfflineDatabase(String tableName) {
		return crawJobDao.isExistTableOfflineDatabase(tableName);
	}

	public synchronized void makeSureExistTable(String tableName) {
		boolean isExistTable = this.isExistTableOnlineDatabase(tableName);
		if (!isExistTable) {
			this.createTable(tableName);
			logger.info("Table:[" + tableName + "] is not exist to create it.");
		}
	}

	public void batchSave(List<Record> jobInfos, Date crawlDate,
			String tableName) {
		crawJobDao.batchSave(crawlDate, tableName, jobInfos);
	}

	public List<Record> findBeforeJobs(String tableName, int maxReturn) {
		return crawJobDao.findRecords(tableName, maxReturn);
	}

	public List<String> findExistedTables() {
		return crawJobDao.findExistJobSaveTableNames(BASE_TABLE_NAME_PREFIX);
	}

	public String findLastExistTable(int searchEngineId) {
		List<String> tableNames = crawJobDao
				.findExistenceWebsiteTableNames(searchEngineId);
		if (CollectionUtils.isNotEmpty(tableNames)) {
			return tableNames.get(0);
		} else {
			return null;
		}
	}

	public String[] crawledStatistics(int engineId, String tableName) {
		return crawJobDao.procStatistics(engineId, tableName);
	}

	private void createTable(String tableName) {
		crawJobDao.createTable(tableName);
	}

	public void dropOnlineTable(String tableName) {
		if (StringUtils.startsWithIgnoreCase(tableName, BASE_TABLE_NAME_PREFIX)) {
			crawJobDao.dropOnlineTable(tableName);
			logger.info("Droped online table name:[" + tableName + "]");
		} else {
			logger.error("Table name must start with ["
					+ BASE_TABLE_NAME_PREFIX + "] for drop.");
		}
	}

	public void dropOfflineTable(String tableName) {
		if (StringUtils.startsWithIgnoreCase(tableName, BASE_TABLE_NAME_PREFIX)) {
			crawJobDao.dropOfflineTable(tableName);
			logger.info("Droped offline table name:[" + tableName + "]");
		} else {
			logger.error("Table name must start with ["
					+ BASE_TABLE_NAME_PREFIX + "] for drop.");
		}
	}

	@SuppressWarnings("unchecked")
	public List<String[]> test(int searchEngineId, String postUrl)
			throws ClassNotFoundException {
		// load from database
		SearchEngine searchEngine = searchEngineService
				.findById(searchEngineId);
		ListPageConfig listPageConfig = listPageConfigService
				.findBySearchEngineId(searchEngineId);
		// make create query url
		String spellUrl = postUrl;
		String createQueryURLClass = searchEngine.getCreateQueryURLClass();
		if (StringUtils.isNotBlank(createQueryURLClass)) {
			CreateQueryURL createQueryURL = BeanUtils.instantiate(ClassUtils
					.getClass(createQueryURLClass));
			spellUrl = createQueryURL.toSpellUrl(postUrl);
		}
		QueryURL queryUrl = new QueryURL(searchEngineId, spellUrl, postUrl);
		// make crawl task
		HttpClientService httpClientService = Constants.getApplicationContext()
				.getBean("httpClientService", HttpClientService.class);
		SpiderTask task = BeanUtils.instantiate(ClassUtils.getClass(searchEngine
				.getSpiderTaskClass()));
		task.setSearchEngineService(searchEngineService);
		task.setSearchEngine(searchEngine);
		task.setSearchEngineParamService(searchEngineParamService);
		task.setListPageConfig(listPageConfig);
		task.setHttpClientService(httpClientService);
		task.setQueryURL(queryUrl);
		// begin test
		List<String[]> testResults = new ArrayList<String[]>();
		String htmlSource = null;
		String jobInfoHtmlSource = null;
		try {
			// 运行前初始化
			task.initializationBeforeRun();
			putTestResult(testResults, "执行单个抓取任务前处理：", "完成！");
			// 替换占位符号
			String urlEncode = SearchEngineService.getEncode(searchEngine
					.getUrlEncode());
			String beforeSpellUrl = queryUrl.getSpellUrl();
			queryUrl.setSpellUrl(searchEngineParamService.replaceHolder(
					queryUrl.getSpellUrl(), urlEncode));
			queryUrl.setPostUrl(searchEngineParamService.replaceHolder(
					queryUrl.getPostUrl(), urlEncode));
			String afterSpellUrl = queryUrl.getSpellUrl();
			putTestResult(testResults, "替换URL中的占位符(SpellUrl)：", "Before:["
					+ beforeSpellUrl + "] After:[" + afterSpellUrl + "]");
			// 搜索URL修饰
			String beforePostUrl = queryUrl.getPostUrl();
			task.reWriteQueryUrl();
			String afterPostUrl = queryUrl.getPostUrl();
			putTestResult(testResults, "重新修饰搜索URL(PostUrl)：", "Before:["
					+ beforePostUrl + "] After:[" + afterPostUrl + "]");
			// 获取HTML源代码
			htmlSource = task.crawlPostUrlHtmlSource();
			String file = FileUtils.getTempDirectoryPath() + File.separator
					+ DateFormatUtils.format(new Date(), "yyyyMMddHHmmss")
					+ "_" + searchEngineId + ".txt";
			FileUtils.writeStringToFile(new File(file), htmlSource, "GBK");
			putTestResult(testResults, "Http获取html源代码：", "html代码长度:["
					+ StringUtils.length(htmlSource) + "] save file:[" + file
					+ "]");
			task.setPostUrlHtmlSource(htmlSource);
			// 获取数据列表部分HTML代码
			jobInfoHtmlSource = task.sectionContainsJobsHtmlSource();
			putTestResult(testResults, "获取包含数据列表的Html代码部分：", "html代码长度:["
					+ StringUtils.length(jobInfoHtmlSource) + "]");
			task.setContainsJobsHtmlSource(jobInfoHtmlSource);
			// 是否是"没有数据"页面
			boolean isNoResultPage = task.isNoSearchResultsPage();
			putTestResult(testResults, "判断是否是“没有搜索结果页面”：", "[" + isNoResultPage
					+ "]");
			// 判断是否是第一页分页。
			boolean firstPage = task.isFirstListPage();
			putTestResult(testResults, "判断是否是第一个分页：",
					"[" + String.valueOf(firstPage) + "]");
			// 返回记录数
			Integer returnRecordNum = task.extractReturnRecordNum();
			putTestResult(testResults, "结果页面中显示的“返回记录数量”：", "["
					+ returnRecordNum + "]");
			// 是否超过最大返回记录判断.
			Boolean tooLargeReturn = task.largeThanMaxReturn();
			putTestResult(
					testResults,
					"是否返回记录数超过最大显示数量限制：",
					"返回数:[" + returnRecordNum + "],最大返回数:["
							+ listPageConfig.getMaxRecordNum() + "] 结果:["
							+ tooLargeReturn + "]");
			// 获取数据列表
			List<Record> crawlJobs = task.extractJobs();
			putTestResult(testResults, "获取本页显示的数据列表：", "获取的数据数量:["
					+ CollectionUtils.size(crawlJobs) + "]");
			// 列出分析出来的数据列表
			putTestResult(testResults, "数据列表明细:",
					makeJobsTableHtml(searchEngine, crawlJobs));
			// 获取是否采用强制“计算方式”分页
			boolean forceUseCalculateTurnPage = task
					.getForceUseCalculateTurnPage();
			putTestResult(testResults, "配置中是否使用“计算分页”:",
					String.valueOf(forceUseCalculateTurnPage));
			// 获取当前页码
			Integer currentPageNo = task.extractCurrentPageNo();
			putTestResult(testResults, "当前列表的页码数:",
					currentPageNo != null ? String.valueOf(currentPageNo)
							: "null");
			// 获取配置的每页记录数量
			int pageSize = listPageConfig.getPageSize();
			putTestResult(testResults, "配置的每页显示数量:", String.valueOf(pageSize));
			// 计算总页数
			Integer totalPageNum = null;
			if (returnRecordNum != null) {
				totalPageNum = Page.getAnyTotalPageCount(returnRecordNum,
						pageSize);
				putTestResult(testResults, "总页数：",
						totalPageNum != null ? String.valueOf(totalPageNum)
								: "null");
			} else {
				putTestResult(testResults, "总页数：", "因为返回记录数量是空的，所以无法计算出来。");
			}
			// 判断是否存在下一页
			Boolean hasNextPage = task.hasNextPage();
			putTestResult(testResults, "是否有下一页存在",
					hasNextPage != null ? String.valueOf(hasNextPage) : "null");

			// 条件触发“计算分页”
			if (firstPage && (tooLargeReturn == null || !tooLargeReturn)
					&& returnRecordNum != null
					&& (hasNextPage == null || hasNextPage)) {
				List<String> spellUrls = task
						.makeSecondStartNextSpellUrl(totalPageNum);
				putTestResult(testResults, "计算从第二页开始到最后一页URL地址：", "数量:["
						+ CollectionUtils.size(spellUrls) + "]");
				putTestResult(
						testResults,
						"计算从第二页开始到最后一页URL地址：",
						"第一个URL(SpellUrl):["
								+ (CollectionUtils.isNotEmpty(spellUrls) ? spellUrls
										.get(0) : "") + "]");
				if (CollectionUtils.size(spellUrls) >= 2) {
					putTestResult(testResults, "计算从第二页开始到最后一页URL地址：",
							"第二个URL(SpellUrl):[" + spellUrls.get(1) + "]");
				}
			}
			// 条件触发“添加限定条件”
			if (firstPage && (tooLargeReturn != null && tooLargeReturn)
					&& (hasNextPage == null || hasNextPage)) {
				List<QueryURL> qualifiedURLs = task
						.generateQualifiedURLs(queryUrl);
				putTestResult(testResults, "生成限定搜索URL", "数量:["
						+ CollectionUtils.size(qualifiedURLs) + "]");
				putTestResult(
						testResults,
						"生成限定搜索URL",
						"第一个URL(SpellUrl):["
								+ (CollectionUtils.isNotEmpty(qualifiedURLs) ? qualifiedURLs
										.get(0).getSpellUrl() : "") + "]");
				if (CollectionUtils.size(qualifiedURLs) >= 2) {
					putTestResult(testResults, "生成限定搜索URL",
							"第二个URL(SpellUrl):[" + qualifiedURLs.get(1) + "]");
				}
			}
			// 条件出发“迭代分页”
			if ((tooLargeReturn == null || !tooLargeReturn)
					&& (hasNextPage == null || hasNextPage)
					&& !forceUseCalculateTurnPage) {
				String nextSpellUrl = task.makeNextSpellUrl();
				putTestResult(testResults, "“迭代分页”获取下一页的URL地址:", nextSpellUrl);
			}
		} catch (Exception e) {
			logger.error(
					"htmlSource:[" + EncodeUtil.gbk2iso(htmlSource)
							+ "],jobInfoHtmlSource:["
							+ EncodeUtil.gbk2iso(jobInfoHtmlSource) + "]", e);
			putTestResult(testResults, "Exception", e.getMessage());
		}
		return testResults;
	}

	public void callSpider(String callType, int searchEngineId, Date markDate,
			int findUrlSize, int minThreadNum, int maxThreadNum,
			int arrayQueueSize) {

		if (!haveRunningThread(searchEngineId)) {

			String jobSaveTableName = parseTableName(searchEngineId, markDate);
			makeSureExistTable(jobSaveTableName);

			ApplicationContext context = Constants.getApplicationContext();
			CrawlService thread = context.getBean("crawlService",
					CrawlService.class);
			thread.setSearchEngineId(searchEngineId);
			thread.setMarkDate(markDate);
			thread.setJobSaveTableName(jobSaveTableName);
			thread.setFindUrlSize(findUrlSize);
			thread.initializeThreadPoolExecutor(minThreadNum, maxThreadNum,
					arrayQueueSize);
			thread.start();

			putRunning(searchEngineId, thread);
			logger.info("[" + callType + "] crawl searchEngine["
					+ searchEngineId + "] markDate["
					+ DateFormatUtils.format(markDate, "yyyy-MM-dd") + "] ["
					+ findUrlSize + "/" + minThreadNum + "/" + maxThreadNum
					+ "/" + arrayQueueSize + "] process is running.");
		} else {
			logger.info("This crawl searchEngineId:[" + searchEngineId
					+ "] is running. crawl will return.");
		}
	}

	private static void putRunning(int searchEngineId, CrawlService service) {
		runningThread.put(searchEngineId, service);
	}

	public static boolean haveRunningThread(int searchEngineId) {
		CrawlService crawlThread = takeRunning(searchEngineId);
		if (crawlThread != null) {
			return true;
		} else {
			return false;
		}
	}

	public static boolean haveRunningThreadAndRunnable(int searchEngineId) {
		CrawlService crawlThread = takeRunning(searchEngineId);
		if (crawlThread != null && crawlThread.getRunnable()) {
			return true;
		} else {
			return false;
		}
	}

	public static CrawlService takeRunning(int searchEngineId) {
		return runningThread.get(searchEngineId);
	}

	public static void removeRunning(int searchEngineId) {
		runningThread.remove(searchEngineId);
	}

	public void updateSplitDoFlag(String tableName, List<Record> crawJobs,
			int targetDoFlag) {
		crawJobDao.updateSplitDoFlag(tableName, crawJobs, targetDoFlag);
	}

	private void putTestResult(List<String[]> testResults, String desc,
			String info) {
		String[] msg = new String[] { desc, info };
		testResults.add(msg);
		logger.info(EncodeUtil.gbk2iso("DESCRIBE:[" + desc + "],INFOMATION:["
				+ info + "]"));
	}

	private String makeJobsTableHtml(SearchEngine searchEngine,
			List<Record> crawlJobs) {
		StringBuffer table = new StringBuffer();
		// head
		table.append(" <table border=\"1\"> ");
		table.append(" 	<tr style=\"color: blue;\"> ");
		table.append(" 		<td style='color:red;background-color:gray;'>网站ID</td> ");
		table.append(" 		<td style='color:red;background-color:gray;'>发布数据公司名称</td> ");
		table.append(" 		<td style='color:red;background-color:gray;'>数据名称</td> ");
		table.append(" 		<td style='color:red;background-color:gray;'>发布城市（字符）</td> ");
		table.append(" 		<td>记录ID</td> ");
		table.append(" 		<td>发布记录公司ID</td> ");
		table.append(" 		<td>参数1</td> ");
		table.append(" 		<td>参数2</td> ");
		table.append(" 		<td>参数3</td> ");
		table.append(" 		<td>发布日期</td> ");
		table.append(" 		<td>类型</td> ");
		table.append(" 	</tr> ");
		// body
		for (Record record : crawlJobs) {
			table.append(" 	<tr> ");
			table.append(" 		<td style='color:red;background-color:gray;'>"
					+ searchEngine.getWebsiteId() + "</td> ");
			table.append(" 		<td style='color:red;background-color:gray;'><a href=\""
					+ record.getCompanyLinkURL()
					+ "\" target=\"_blank\">"
					+ record.getCompanyName() + "</a></td> ");
			table.append(" 		<td style='color:red;background-color:gray;'><a href=\""
					+ record.getJobLinkURL()
					+ "\" target=\"_blank\">"
					+ record.getJobTitle() + "</a></td> ");
			table.append(" 		<td style='color:red;background-color:gray;'>"
					+ record.getCityText() + "</td> ");
			table.append(" 		<td>" + record.getCmptrJobId() + "</td> ");
			table.append(" 		<td>" + record.getCmpCompanyId() + "</td> ");
			table.append(" 		<td>" + record.getJobTypeCode() + "</td> ");
			table.append(" 		<td>" + record.getIndustryCode() + "</td> ");
			table.append(" 		<td>" + record.getCityCode() + "</td> ");
			table.append(" 		<td>"
					+ DateFormatUtils.format(record.getJobDate(), "yyyy-MM-dd")
					+ "</td> ");
			table.append(" 		<td>" + record.getJobType() + "</td> ");
			table.append(" 	</tr> ");
		}
		// foot
		table.append(" 	<tr> ");
		table.append(" 		<td colspan=\"12\">页面显示数据条数:["
				+ CollectionUtils.size(crawlJobs)
				+ "],灰底部分是创建MD5的属性（用来标识唯一数据）</td> ");
		table.append(" 	</tr> ");
		table.append(" </table> ");

		return table.toString();
	}

	public List<String> findOnlineJobTableNames(Date beforeDate) {
		return crawJobDao.findOnlineJobTableNames(beforeDate);
	}

	public List<String> findOfflineJobTableNames(Date beforeDate) {
		return crawJobDao.findOfflineJobTableNames(beforeDate);
	}

	public int countOnlineTable(String tableName) {
		return crawJobDao.countOnlineTable(tableName);
	}

	public int countOfflineTable(String tableName) {
		return crawJobDao.countOfflineTable(tableName);
	}

	public boolean checkJdbcConnectionIsSame() {
		boolean returnValue = false;
		try {
			returnValue = crawJobDao.checkJdbcConnectionIsSame();
		} catch (SQLException e) {
			logger.error("", e);
			returnValue = true;
		}
		return returnValue;
	}

	public int countBySql(String sql) {
		return crawJobDao.countBySql(sql);
	}

	public List<String> findAllCompanyNames(String jobSaveTableName) {
		return crawJobDao.findAllCompanyNames(jobSaveTableName);
	}
}
